package com.ml4ai.backend.services.impl;

import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.net.HttpRequest;
import com.ml4ai.backend.services.DbMapService;
import com.ml4ai.backend.stack.webcollector.Ml4aiAutoParseCrawler;
import com.ml4ai.backend.stack.webcollector.StorageAdapterDBManager;
import com.ml4ai.backend.utils.SpringUtils;
import lombok.extern.slf4j.Slf4j;

import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
 * Created by leecheng on 2018/10/19.
 */
@Slf4j
public class WebPageGet extends Ml4aiAutoParseCrawler {

    @FunctionalInterface
    public interface ExecuteOnSession {
        void arrived(Page page, CrawlDatums next);
    }

    public ExecuteOnSession executeOnSession;

    @Override
    public void visit(Page page, CrawlDatums next) {
        executeOnSession.arrived(page, next);
    }

    public WebPageGet(String taskId, ExecuteOnSession executeOnSession, boolean autoParse, List<String> seeds, String... regexs) {
        super(new StorageAdapterDBManager(getMap("crawl".concat("." + taskId)), getMap("fetcher".concat("." + taskId)), getMap("links".concat("." + taskId)), getMap("redirect" + "." + taskId)), autoParse);
        this.executeOnSession = executeOnSession;
        if (seeds != null) {
            for (String url : seeds) {
                this.addSeed(url);
            }
        }
        super.setResumable(true);
        if (regexs != null) {
            for (String regex : regexs) {
                this.addRegex(regex);
            }
        }
        this.setThreads(10);
    }

    public static Map<String, String> getMap(String name) {
        log.info(name);
        //return new LinkedHashMap<>();
        return SpringUtils.getService(DbMapService.class).generateLocalMapWrapper(name);
    }

    @Override
    public Page getResponse(CrawlDatum crawlDatum) throws Exception {
        HttpRequest httpRequest = new HttpRequest(crawlDatum);
        httpRequest.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36");
        httpRequest.addHeader("Cookie", "media_domain=-14.mbsdfg555.online; _ga=GA1.2.843807031.1540106236; _gid=GA1.2.1020974170.1540106236; remember_user_token=W1s1OTk4N10sIiQyYSQxMSRUN3k5dzl1bVJ5ajdNMkdmNU0wWndlIiwiMTU0MDExMTEzNS4xNDI3MTc0Il0%3D--1c05f4744b26dda0e2f133494125ce0bffbcffcb; _session_id=1daa6e019b27b78353cce0aa1a0bf644");
        return httpRequest.responsePage();
    }
}
